program define selection_endog_oax, eclass 
	version 12.1
	syntax varlist(min=1) [if] [in], GEN(name) REFvar(varname) XMIN(real) XMAX(real) TITLEline(string) FILEname(string) IV(varlist) WAGESEEN(varname) DEP(varname) BOOTSTRAP(integer) STUBNEW(string) [HOMEgroup(integer 0) GRID(integer 50) CDFOFF(integer 0) SUBPREDD(integer 0) SUBPREDVAR(varname) SMOOTHOFF(integer 0) SLEEP(integer 0)]

tempvar tempx tempy tempy_smooth
capture drop `gen'

local N_iterations=50

	*****************************
	* SET UP LOCAL -ifext-
	*****************************

if "`if'"=="" {
	
	local ifext="if `refvar'!=."
		
}
if "`if'"!="" {
	
	local ifext="`if' & `refvar'!=."
		
}

	*****************************
	* REGRESS
	*****************************

tempvar xbhat mills probit_sample

probit `wageseen' `iv' `varlist' `if' `in', iterate(`N_iterations')
local N_probit=e(N)

gen `probit_sample'=e(sample)
predict `xbhat' if e(sample)==1, xb
qui gen `mills'=normalden(`xbhat')/normprob(`xbhat')

	*****************************
	* REGRESS
	*****************************
	
if "`if'"=="" {
	local ifext="if `refvar'==`homegroup'"
}
if "`if'"!="" {
	local ifext="`if' & `refvar'==`homegroup'"
}

reg `dep' `varlist' `mills' `ifext' `in'

local N_wagereg=e(N)
local R2_wagereg=e(r2)

	*****************************
	* PREDICT
	*****************************

replace `mills'=0
	
if "`if'"=="" {	
	local ifext="if `refvar'!=. "
}
if "`if'"!="" {
	local ifext="`if' & `refvar'!=." 
}

qui predict `gen' `ifext' `in'

	*************************
	* NOW RESTRICT -gen-, IF SUB_PRED_D==1
	*************************

if `subpredd'==1 {
	tab `refvar' `subpredvar' if `gen'!=., m
	replace `gen'=. if `subpredvar'!=1 & `gen'!=.
}

qui sum `gen'
local N_pred=r(N)

	*****************************
	* RUN OAXACA
	*****************************

	*****************************
	* HOME VERSUS EQUAL
	*****************************

# delimit;
oaxaca `gen'       (unigrade: finalgr finalgr2) (subject: subject_final_d2-subject_final_d24) (unife: stabucode_final_combuni_d2-stabucode_final_combuni_d74)
		   bachelor (further: phdcompl_r further_nonphd_completed_r) (schoolgr: schoolgr schoolgr2) appr samestate_hs_uni_first
		   (ageexp: age age2 exp_r exp2_r) female (partnerchild: married_r partner_r child_r) (parents: edumot edufat fatself fatempl fatpubser fatwork motself motempl motpubser motwork)
		   (cohort: wave_d2-wave_d4)
		   if (`refvar'==0 | `refvar'==1)
, by(ineq_group0) relax
;
#delimit cr

# delimit;
matrix A = (14, _b[overall:difference], _b[overall:difference] - invttail(e(N),0.025)*_se[overall:difference], _b[overall:difference] + invttail(e(N),0.025)*_se[overall:difference] \
	    13, _b[endowments:unigrade], _b[endowments:unigrade] - invttail(e(N),0.025)*_se[endowments:unigrade], _b[endowments:unigrade] + invttail(e(N),0.025)*_se[endowments:unigrade] \
	    12, _b[endowments:subject], _b[endowments:subject] - invttail(e(N),0.025)*_se[endowments:subject], _b[endowments:subject] + invttail(e(N),0.025)*_se[endowments:subject] \
	    11, _b[endowments:unife], _b[endowments:unife] - invttail(e(N),0.025)*_se[endowments:unife], _b[endowments:unife] + invttail(e(N),0.025)*_se[endowments:unife] \
	    10, _b[endowments:bachelor], _b[endowments:bachelor] - invttail(e(N),0.025)*_se[endowments:bachelor], _b[endowments:bachelor] + invttail(e(N),0.025)*_se[endowments:bachelor] \
	     9, _b[endowments:further], _b[endowments:further] - invttail(e(N),0.025)*_se[endowments:further], _b[endowments:further] + invttail(e(N),0.025)*_se[endowments:further] \
	     8, _b[endowments:schoolgr], _b[endowments:schoolgr] - invttail(e(N),0.025)*_se[endowments:schoolgr], _b[endowments:schoolgr] + invttail(e(N),0.025)*_se[endowments:schoolgr] \
	     7, _b[endowments:appr], _b[endowments:appr] - invttail(e(N),0.025)*_se[endowments:appr], _b[endowments:appr] + invttail(e(N),0.025)*_se[endowments:appr] \
	     6, _b[endowments:samestate_hs_uni_first], _b[endowments:samestate_hs_uni_first] - invttail(e(N),0.025)*_se[endowments:samestate_hs_uni_first], _b[endowments:samestate_hs_uni_first] + invttail(e(N),0.025)*_se[endowments:samestate_hs_uni_first] \
	     5, _b[endowments:ageexp], _b[endowments:ageexp] - invttail(e(N),0.025)*_se[endowments:ageexp], _b[endowments:ageexp] + invttail(e(N),0.025)*_se[endowments:ageexp] \
	     4, _b[endowments:female], _b[endowments:female] - invttail(e(N),0.025)*_se[endowments:female], _b[endowments:female] + invttail(e(N),0.025)*_se[endowments:female] \
	     3, _b[endowments:partnerchild], _b[endowments:partnerchild] - invttail(e(N),0.025)*_se[endowments:partnerchild], _b[endowments:partnerchild] + invttail(e(N),0.025)*_se[endowments:partnerchild] \
	     2, _b[endowments:parents], _b[endowments:parents] - invttail(e(N),0.025)*_se[endowments:parents], _b[endowments:parents] + invttail(e(N),0.025)*_se[endowments:parents] \
	     1, _b[endowments:cohort], _b[endowments:cohort] - invttail(e(N),0.025)*_se[endowments:cohort], _b[endowments:cohort] + invttail(e(N),0.025)*_se[endowments:cohort] 
)
;
#delimit cr

local oax01_difference=_b[overall:difference]
local oax01_unigrade=_b[endowments:unigrade]
local oax01_subject=_b[endowments:subject]
local oax01_unife= _b[endowments:unife]
local oax01_bachelor= _b[endowments:bachelor]
local oax01_further= _b[endowments:further]
local oax01_schoolgr= _b[endowments:schoolgr]
local oax01_appr= _b[endowments:appr]
local oax01_samestate_hs_uni_first= _b[endowments:samestate_hs_uni_first]
local oax01_ageexp= _b[endowments:ageexp]
local oax01_female= _b[endowments:female]
local oax01_partnerchild= _b[endowments:partnerchild]
local oax01_parents= _b[endowments:parents]
local oax01_cohort= _b[endowments:cohort]

	*****************************
	* HOME VERSUS UNEQUAL
	*****************************

# delimit;
oaxaca `gen'       (unigrade: finalgr finalgr2) (subject: subject_final_d2-subject_final_d24) (unife: stabucode_final_combuni_d2-stabucode_final_combuni_d74)
		   bachelor (further: phdcompl_r further_nonphd_completed_r) (schoolgr: schoolgr schoolgr2) appr samestate_hs_uni_first
		   (ageexp: age age2 exp_r exp2_r) female (partnerchild: married_r partner_r child_r) (parents: edumot edufat fatself fatempl fatpubser fatwork motself motempl motpubser motwork)
		   (cohort: wave_d2-wave_d4)
		   if (`refvar'==2 | `refvar'==1)
, by(ineq_group0) relax swap
;
#delimit cr

# delimit;
matrix A = (14, _b[overall:difference], _b[overall:difference] - invttail(e(N),0.025)*_se[overall:difference], _b[overall:difference] + invttail(e(N),0.025)*_se[overall:difference] \
	    13, _b[endowments:unigrade], _b[endowments:unigrade] - invttail(e(N),0.025)*_se[endowments:unigrade], _b[endowments:unigrade] + invttail(e(N),0.025)*_se[endowments:unigrade] \
	    12, _b[endowments:subject], _b[endowments:subject] - invttail(e(N),0.025)*_se[endowments:subject], _b[endowments:subject] + invttail(e(N),0.025)*_se[endowments:subject] \
	    11, _b[endowments:unife], _b[endowments:unife] - invttail(e(N),0.025)*_se[endowments:unife], _b[endowments:unife] + invttail(e(N),0.025)*_se[endowments:unife] \
	    10, _b[endowments:bachelor], _b[endowments:bachelor] - invttail(e(N),0.025)*_se[endowments:bachelor], _b[endowments:bachelor] + invttail(e(N),0.025)*_se[endowments:bachelor] \
	     9, _b[endowments:further], _b[endowments:further] - invttail(e(N),0.025)*_se[endowments:further], _b[endowments:further] + invttail(e(N),0.025)*_se[endowments:further] \
	     8, _b[endowments:schoolgr], _b[endowments:schoolgr] - invttail(e(N),0.025)*_se[endowments:schoolgr], _b[endowments:schoolgr] + invttail(e(N),0.025)*_se[endowments:schoolgr] \
	     7, _b[endowments:appr], _b[endowments:appr] - invttail(e(N),0.025)*_se[endowments:appr], _b[endowments:appr] + invttail(e(N),0.025)*_se[endowments:appr] \
	     6, _b[endowments:samestate_hs_uni_first], _b[endowments:samestate_hs_uni_first] - invttail(e(N),0.025)*_se[endowments:samestate_hs_uni_first], _b[endowments:samestate_hs_uni_first] + invttail(e(N),0.025)*_se[endowments:samestate_hs_uni_first] \
	     5, _b[endowments:ageexp], _b[endowments:ageexp] - invttail(e(N),0.025)*_se[endowments:ageexp], _b[endowments:ageexp] + invttail(e(N),0.025)*_se[endowments:ageexp] \
	     4, _b[endowments:female], _b[endowments:female] - invttail(e(N),0.025)*_se[endowments:female], _b[endowments:female] + invttail(e(N),0.025)*_se[endowments:female] \
	     3, _b[endowments:partnerchild], _b[endowments:partnerchild] - invttail(e(N),0.025)*_se[endowments:partnerchild], _b[endowments:partnerchild] + invttail(e(N),0.025)*_se[endowments:partnerchild] \
	     2, _b[endowments:parents], _b[endowments:parents] - invttail(e(N),0.025)*_se[endowments:parents], _b[endowments:parents] + invttail(e(N),0.025)*_se[endowments:parents] \
	     1, _b[endowments:cohort], _b[endowments:cohort] - invttail(e(N),0.025)*_se[endowments:cohort], _b[endowments:cohort] + invttail(e(N),0.025)*_se[endowments:cohort] 
)
;
#delimit cr

local oax12_difference=_b[overall:difference]
local oax12_unigrade=_b[endowments:unigrade]
local oax12_subject=_b[endowments:subject]
local oax12_unife= _b[endowments:unife]
local oax12_bachelor= _b[endowments:bachelor]
local oax12_further= _b[endowments:further]
local oax12_schoolgr= _b[endowments:schoolgr]
local oax12_appr= _b[endowments:appr]
local oax12_samestate_hs_uni_first= _b[endowments:samestate_hs_uni_first]
local oax12_ageexp= _b[endowments:ageexp]
local oax12_female= _b[endowments:female]
local oax12_partnerchild= _b[endowments:partnerchild]
local oax12_parents= _b[endowments:parents]
local oax12_cohort= _b[endowments:cohort]

	*****************************
	* TEST
	*****************************

local sizeJ=0
qui levels `refvar', local(levels) 
foreach ll of local levels {
	local sizeJ=`sizeJ'+1
}

matrix define ksmirnov_pval=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_stat=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_stat1s=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_pval1s=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_stat1srev=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_pval1srev=J(`sizeJ',`sizeJ',.)

local count1=1
local count2=1

foreach ll1 of local levels {
	foreach ll2 of local levels {	
			
		if `ll1'!=`ll2' {
			qui gen aux=1 if `refvar'==`ll1'
			qui replace aux=0 if `refvar'==`ll2'

			qui ksmirnov `gen' if `refvar'==`ll1' | `refvar'==`ll2', by(aux) /* exact */
			local pval=r(p_cor)
			local stat=r(D)
			local stat1s=r(D_1)
			local pval1s=r(p_1)
			local stat1srev=r(D_2)
			local pval1srev=r(p_2)
			matrix ksmirnov_pval[`count2',`count1'] = `pval'
			matrix ksmirnov_stat[`count2',`count1'] = `stat'
			matrix ksmirnov_stat1s[`count2',`count1'] = `stat1s'
			matrix ksmirnov_pval1s[`count2',`count1'] = `pval1s'
			matrix ksmirnov_stat1srev[`count2',`count1'] = `stat1srev'
			matrix ksmirnov_pval1srev[`count2',`count1'] = `pval1srev'
			drop aux
		}
		
		local count2=`count2'+1

	}
	
	local count1=`count1'+1
	local count2=1

}

if `cdfoff'==0 {

	*****************************
	* COMPUTE CDF
	*****************************

local h_factor=0.6

qui gen double `tempx'=`xmin' + (_n/`grid')*(`xmax'-`xmin') in 1/`grid'
label var `tempx' "predicted home wage according to covariates"

foreach ll1 of local levels {

	qui gen double `tempy'=.
	qui gen double `tempy_smooth'=.
	
		*********************************
		* CHOOSE BANDWIDTH
		*********************************
	
	qui sum `gen' if `gen'!=. &  `refvar'==`ll1' 
	local sd=r(sd)
	local N=r(N)
	local h_cdf=`h_factor'*1.06*`sd'*(`N'^(-1/5))
	
		*********************************
		* END BANDWIDTH
		*********************************
	
	forval nn=1/`grid' {
		qui gen tempaux=.
		qui replace tempaux=(`gen'<=`tempx'[`nn']) if `gen'!=. & `tempx'[`nn']!=. &  `refvar'==`ll1'
		qui sum tempaux, mean
		qui replace `tempy'=r(mean) if _n==`nn'
		drop tempaux
	
		if `smoothoff'==0 {
			qui gen tempaux_smooth=.
			qui replace tempaux_smooth=normal((-`gen'+`tempx'[`nn'])/`h_cdf') if `gen'!=. & `tempx'[`nn']!=. &  `refvar'==`ll1'
			qui sum tempaux_smooth, mean
			qui replace `tempy_smooth'=r(mean) if _n==`nn'
			drop tempaux_smooth
		}
	}
	
	tempvar tempy_`ll1'
	rename `tempy' `tempy_`ll1''
	
	tempvar tempy_smooth_`ll1'
	rename `tempy_smooth' `tempy_smooth_`ll1''

}

} /* cdfoff */

	*****************************
	* IMPORT DATA FROM BASE RUN
	*****************************

capture drop `stubnew'_*

if `bootstrap'==1 {
	getmata base_group base_gen base_sel, replace force
}
if `bootstrap'==0 {
	qui gen `stubnew'_group=`refvar'
	qui gen `stubnew'_gen=0
	qui gen `stubnew'_sel=`refvar'!=.
}

	*********************************
	* MANUAL KS-TEST
	*********************************

matrix define KS_DiDmax=J(`sizeJ',`sizeJ',.)
matrix define KS_DiDabs=J(`sizeJ',`sizeJ',.)
matrix define KS_DiDmaxrev=J(`sizeJ',`sizeJ',.)

local count1=1
local count2=1

foreach ll1 of local levels {
foreach ll2 of local levels {

	********************
	* START MANUAL KS-TEST
	********************

tempvar ks_group_d ks_sel_d base_group_d base_sel_d

qui gen `ks_group_d'=(`refvar'==`ll1') if ((`refvar'==`ll1') | (`refvar'==`ll2'))
qui gen `ks_sel_d'=((`refvar'==`ll1') | (`refvar'==`ll2')) & `gen'!=.

qui gen `base_group_d'=(`stubnew'_group==`ll1') if ((`stubnew'_group==`ll1') | (`stubnew'_group==`ll2'))
qui gen `base_sel_d'=((`stubnew'_group==`ll1') | (`stubnew'_group==`ll2')) & `stubnew'_gen!=.

ks_manual_DiD, d(`ks_group_d') x(`gen') sel(`ks_sel_d') dbase(`base_group_d') xbase(`stubnew'_gen) selbase(`base_sel_d')
local ks_DiD_abs=r(ks_DiD_abs)
local ks_DiD_0lt1=r(ks_DiD_0lt1)
local ks_DiD_1lt0=r(ks_DiD_1lt0)


matrix KS_DiDmax[`count2',`count1']=`ks_DiD_0lt1'
matrix KS_DiDabs[`count2',`count1']=`ks_DiD_abs'
matrix KS_DiDmaxrev[`count2',`count1']=`ks_DiD_1lt0'

drop `ks_group_d' `ks_sel_d' 

	********************
	* END MANUAL KS-TEST
	********************

local count2=`count2'+1

} /* ll2 */
	
local count1=`count1'+1
local count2=1
 
} /* l11 */

	*****************************
	* EXPORT DATA
	*****************************

if `cdfoff'==0 {

preserve
	qui gen x_cdf=`tempx'
	local txt3="x_cdf"
	foreach ll1 of local levels {
		qui gen y_cdf_`ll1'=`tempy_`ll1'' 
		qui gen y_cdf_smooth_`ll1'=`tempy_smooth_`ll1''
		local txt3="`txt3' y_cdf_`ll1' y_cdf_smooth_`ll1'"
	}
	keep `txt3'
	qui keep if _n<=`grid'
	di "Note: output saved as output_`filename'. $S_DATE, $S_TIME"
	save "${output}/output_`filename'.dta", replace
restore

} /* cdfoff */

	*****************************
	* EXPORT STUBNEW
	*****************************

capture drop `stubnew'_*

if `bootstrap'==0 {	
	qui gen `stubnew'_group=`refvar'
	qui gen `stubnew'_gen = `gen'
	qui gen `stubnew'_sel=`refvar'!=. & `gen'!=.

}

	*****************************
	* PREPARE RETURN VECTOR
	*****************************

local home_unequal_KS=ksmirnov_stat[2,3]
local equal_home_KS=ksmirnov_stat[1,2]
local equal_unequal_KS=ksmirnov_stat[1,3]

local home_unequal_KS_pval=ksmirnov_pval[2,3]
local equal_home_KS_pval=ksmirnov_pval[1,2]
local equal_unequal_KS_pval=ksmirnov_pval[1,3]

local home_unequal_KS1s=ksmirnov_stat1s[2,3]
local equal_home_KS1s=ksmirnov_stat1s[1,2]
local equal_unequal_KS1s=ksmirnov_stat1s[1,3]

local home_unequal_KS1srev=ksmirnov_stat1srev[2,3]
local equal_home_KS1srev=ksmirnov_stat1srev[1,2]
local equal_unequal_KS1srev=ksmirnov_stat1srev[1,3]

local home_unequal_KS_pval1s=ksmirnov_pval1s[2,3]
local equal_home_KS_pval1s=ksmirnov_pval1s[1,2]
local equal_unequal_KS_pval1s=ksmirnov_pval1s[1,3]

local home_unequal_KS_pval1srev=ksmirnov_pval1srev[2,3]
local equal_home_KS_pval1srev=ksmirnov_pval1srev[1,2]
local equal_unequal_KS_pval1srev=ksmirnov_pval1srev[1,3]

local home_unequal_KS_DiDmax=KS_DiDmax[2,3]
local equal_home_KS_DiDmax=KS_DiDmax[1,2]
local equal_unequal_KS_DiDmax=KS_DiDmax[1,3]

local home_unequal_KS_DiDmaxrev=KS_DiDmaxrev[2,3]
local equal_home_KS_DiDmaxrev=KS_DiDmaxrev[1,2]
local equal_unequal_KS_DiDmaxrev=KS_DiDmaxrev[1,3]

local home_unequal_KS_DiDabs=KS_DiDabs[2,3]
local equal_home_KS_DiDabs=KS_DiDabs[1,2]
local equal_unequal_KS_DiDabs=KS_DiDabs[1,3]

	***************
	* PREPARE FOR OUTPUT
	***************

matrix define b=( `equal_home_KS',`home_unequal_KS',`equal_unequal_KS', ///
`equal_home_KS1s',`home_unequal_KS1s',`equal_unequal_KS1s', ///
`equal_home_KS1srev',`home_unequal_KS1srev',`equal_unequal_KS1srev', ///
`equal_home_KS_pval',`home_unequal_KS_pval',`equal_unequal_KS_pval', ///
`equal_home_KS_pval1s',`home_unequal_KS_pval1s',`equal_unequal_KS_pval1s', ///
`equal_home_KS_pval1srev',`home_unequal_KS_pval1srev',`equal_unequal_KS_pval1srev', ///
`home_unequal_KS_DiDabs',`equal_home_KS_DiDabs',`equal_unequal_KS_DiDabs', ///
`home_unequal_KS_DiDmax',`equal_home_KS_DiDmax',`equal_unequal_KS_DiDmax', ///
`home_unequal_KS_DiDmaxrev',`equal_home_KS_DiDmaxrev',`equal_unequal_KS_DiDmaxrev', ///
`N_wagereg',`R2_wagereg',`N_pred',`N_probit', ///
`oax01_difference', `oax01_unigrade', `oax01_subject', `oax01_unife', `oax01_bachelor', `oax01_further', ///
`oax01_schoolgr', `oax01_appr', `oax01_samestate_hs_uni_first', `oax01_ageexp', ///
`oax01_female', `oax01_partnerchild', `oax01_parents', `oax01_cohort', ///
`oax12_difference', `oax12_unigrade',`oax12_subject', `oax12_unife', `oax12_bachelor', `oax12_further', ///
`oax12_schoolgr', `oax12_appr', `oax12_samestate_hs_uni_first', `oax12_ageexp', ///
`oax12_female', `oax12_partnerchild', `oax12_parents', `oax12_cohort')

matrix colnames b="equal_home_KS" "home_unequal_KS" "equal_unequal_KS" ///
"equal_home_KS1s" "home_unequal_KS1s" "equal_unequal_KS1s" ///
"equal_home_KS1srev" "home_unequal_KS1srev" "equal_unequal_KS1srev" ///
"equal_home_KS_pval" "home_unequal_KS_pval" "equal_unequal_KS_pval" ///
"equal_home_KS_pval1s" "home_unequal_KS_pval1s" "equal_unequal_KS_pval1s" ///
"equal_home_KS_pval1srev" "home_unequal_KS_pval1srev" "equal_unequal_KS_pval1srev" ///
"home_unequal_KS_DiDabs" "equal_home_KS_DiDabs" "equal_unequal_KS_DiDabs" ///
"home_unequal_KS_DiDmax" "equal_home_KS_DiDmax" "equal_unequal_KS_DiDmax" ///
"home_unequal_KS_DiDmaxrev" "equal_home_KS_DiDmaxrev" "equal_unequal_KS_DiDmaxrev" ///
"N_wagereg" "R2_wagereg" "N_pred" "N_probit" ///
///
"oax01_difference" "oax01_unigrade" "oax01_subject" "oax01_unife" "oax01_bachelor" "oax01_further" ///
"oax01_schoolgr" "oax01_appr" "oax01_samestate_hs_uni_first" "oax01_ageexp" ///
"oax01_female" "oax01_partnerchild" "oax01_parents" "oax01_cohort" ///
"oax12_difference" "oax12_unigrade" "oax12_subject" "oax12_unife" "oax12_bachelor" "oax12_further" ///
"oax12_schoolgr" "oax12_appr" "oax12_samestate_hs_uni_first" "oax12_ageexp" ///
"oax12_female" "oax12_partnerchild" "oax12_parents" "oax12_cohort" 

	*****************************
	* PREPARE -ERETURN-
	*****************************

qui gen sample=(`gen'!=.)
ereturn post b, esample(sample)

if `sleep'>0 {
	sleep `sleep'
}

end

	*
